R libraries.

library(data.table)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(stringr)
library(cowplot)

library(tidyr)
theme_set(theme_classic())

release_name = params$release_name
release_name
## [1] "2025_06_JAX_RNAseq08"

Read in results

result_dir = file.path("results", release_name)
enrichment_output_dir = file.path(result_dir, "enrichment")

res = readRDS(file.path(enrichment_output_dir, 
                        sprintf("%s_enrichment.rds", release_name)))
names(res)
##  [1] "PrS_day_6_hyp_ASCL2_PTC_up"          "PrS_day_6_hyp_ASCL2_PTC_down"       
##  [3] "PrS_day_6_hyp_ELF5_PTC_up"           "PrS_day_6_hyp_ELF5_PTC_down"        
##  [5] "PrS_day_6_hyp_HIF1A_PTC_up"          "PrS_day_6_hyp_HIF1A_PTC_down"       
##  [7] "PrS_day_6_nor_DLX3_PTC_up"           "PrS_day_6_nor_DLX3_PTC_down"        
##  [9] "PrS_day_6_nor_ELF5_PTC_up"           "PrS_day_6_nor_ELF5_PTC_down"        
## [11] "PrS_day_6_nor_MSX2_PTC_up"           "PrS_day_6_nor_MSX2_PTC_down"        
## [13] "PrS_day_6_nor_NR2F2_PTC_up"          "PrS_day_6_nor_NR2F2_PTC_down"       
## [15] "PrS_day_6_nor_VGLL1_PTC_up"          "PrS_day_6_ASCL2_PTC_hyp_vs_nor_up"  
## [17] "PrS_day_6_ASCL2_PTC_hyp_vs_nor_down" "PrS_day_6_ELF5_PTC_hyp_vs_nor_up"   
## [19] "PrS_day_6_ELF5_PTC_hyp_vs_nor_down"  "PrS_day_6_HIF1A_PTC_hyp_vs_nor_up"  
## [21] "PrS_day_6_HIF1A_PTC_hyp_vs_nor_down" "PrS_day_6_TP63_PTC_hyp_vs_nor_up"   
## [23] "PrS_day_6_TP63_PTC_hyp_vs_nor_down"  "PrS_day_6_WT_WT_hyp_vs_nor_up"      
## [25] "PrS_day_6_WT_WT_hyp_vs_nor_down"
multi_strategy_datasets = c("2023_12_JAX_RNAseq_ExtraEmbryonic",
                            "2024_05_JAX_RNAseq2_ExtraEmbryonic", 
                            "2024_06_JAX_RNAseq_Neuroectoderm", 
                            "2024_09_JAX_RNAseq6_Diverse", 
                            "2024_09_JAX_RNAseq7_Reversion")
  
if (release_name%in%multi_strategy_datasets){
  
  to_plot = grep("PTC|reverted|WT_WT_hyp_vs_nor", names(res))
  res = res[to_plot]

}

sapply(res, function(x){sapply(x,nrow)})
## $PrS_day_6_hyp_ASCL2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_hyp_ASCL2_PTC_down
## reactome    go_bp 
##        5       10 
## 
## $PrS_day_6_hyp_ELF5_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_hyp_ELF5_PTC_down
## reactome    go_bp 
##        2       10 
## 
## $PrS_day_6_hyp_HIF1A_PTC_up
## go_bp 
##    10 
## 
## $PrS_day_6_hyp_HIF1A_PTC_down
## reactome    go_bp 
##        5       10 
## 
## $PrS_day_6_nor_DLX3_PTC_up
## reactome    go_bp 
##        4       10 
## 
## $PrS_day_6_nor_DLX3_PTC_down
## go_bp 
##     1 
## 
## $PrS_day_6_nor_ELF5_PTC_up
## reactome    go_bp 
##        2       10 
## 
## $PrS_day_6_nor_ELF5_PTC_down
## reactome 
##        1 
## 
## $PrS_day_6_nor_MSX2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_nor_MSX2_PTC_down
## reactome    go_bp 
##        1       10 
## 
## $PrS_day_6_nor_NR2F2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_nor_NR2F2_PTC_down
## reactome    go_bp 
##        7       10 
## 
## $PrS_day_6_nor_VGLL1_PTC_up
## go_bp 
##     1 
## 
## $PrS_day_6_ASCL2_PTC_hyp_vs_nor_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ASCL2_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ELF5_PTC_hyp_vs_nor_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ELF5_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_HIF1A_PTC_hyp_vs_nor_up
## reactome    go_bp 
##        9       10 
## 
## $PrS_day_6_HIF1A_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_TP63_PTC_hyp_vs_nor_up
## reactome    go_bp 
##        3       10 
## 
## $PrS_day_6_TP63_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_WT_WT_hyp_vs_nor_up
## go_bp 
##    10 
## 
## $PrS_day_6_WT_WT_hyp_vs_nor_down
## reactome    go_bp 
##       10       10
df_list = list()

for(label in names(res)){
  res1 = res[[label]]
  names(res1)[which(names(res1)=="go_bp")] = "GO biological process"
  type = names(res1)
  
  df_go = NULL
  
  for(gset1 in type){
    res_g1 = res1[[gset1]]
    res_g1$type = gset1
    df_go = rbind(df_go, res_g1)
  }
  
  df_list[[label]] = df_go
}

Prepare output directories for saving figure files out

df_size = NULL

figure_dir = file.path("figures", release_name)
goseq_figure_dir = file.path(figure_dir, "goseq_plots")

if (!file.exists(goseq_figure_dir)){
  dir.create(goseq_figure_dir, recursive = TRUE)
}

Generate plots

nchar_limit = 70

for(k in 1:length(df_list)){
  d1 = df_list[[k]]
  
  if(sum(d1$FDR < 0.05) == 0){ next }
  
  d1 = d1[d1$FDR < 0.05,]
  d1 = d1[order(d1$FDR, decreasing = TRUE),]
  
  d1_label = sapply(d1$category, function(x) {
    space_positions = gregexpr(" ", x)[[1]]
    if (nchar(x) > nchar_limit) {
      break_pos = max(space_positions[space_positions < nchar_limit])
      return(paste0(paste0(rep(" ", nchar_limit-break_pos), collapse=""), 
                    substr(x, 1, break_pos), "\n", 
                    substr(x, break_pos+1, nchar(x))))
    } else {
      x = paste0(paste0(rep(" ", 1.1*(nchar_limit-nchar(x))), collapse=""), x)
      return(x)
    }
  })

  d1$y = 1:nrow(d1)
  nm1 = names(df_list)[k]
  nm1 = gsub("_", " ", nm1)
  nm1 = gsub("PTC ", "", nm1)
  nm1 = gsub("nor ", "Normoxia ", nm1)
  nm1 = gsub("hyp ", "Hypoxia ", nm1)
  nm1 = gsub("up", "up-regulated", nm1)
  nm1 = gsub("down", "down-regulated", nm1)
  
  if (grepl("Hypoxia vs Normoxia", nm1)){
    nm1_substrings = strsplit(nm1, " Hypoxia vs Normoxia ")[[1]]
    nm1 = paste0(nm1_substrings[1], 
                 "\nHypoxia vs Normoxia\n", 
                 nm1_substrings[2])
  }else{
    if (!grepl("EPAS1", nm1)){ # keep Normoxia for comparisons involving EPAS1 gene
      nm1 = gsub("Normoxia ", "", nm1)
    }
    nm1 = sub("reverted ", "", nm1) # only replace the first occurrence of "reverted "
    substrings = str_split(nm1, " ")[[1]]
    nm1 = paste0(paste(substrings[1:(length(substrings)-1)], collapse = " "), 
                 "\n", paste(substrings[length(substrings)], collapse = " "))
  }
  
  mycolors <- c("GO biological process" = "#F8766D", "reactome" = "#00BFC4")
  filtered_colors <- mycolors[unique(d1$type)]
  
  g1 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
    geom_point(size = 2.5) + 
    labs(x = "-log10(FDR)", y="", title = nm1) + 
    scale_y_continuous(breaks = 1:nrow(d1), 
                       labels = d1_label, 
                       limits = c(0.8, nrow(d1)+0.2)) + 
    theme(legend.position = "top") + 
    scale_color_manual(values = filtered_colors) + 
    guides(color = guide_legend(title = NULL))

  h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
  if (nrow(d1)<=2){
    h1 = 0.75*(nrow(d1)+1)/20 + 0.25
  }
  
  canvas <- ggdraw() + draw_plot(g1, x = 0, y = 1-h1, width = 1, height = h1)    
  print(canvas)
  
  if (!grepl("Hypoxia vs Normoxia", nm1)){
    
    g2 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
      geom_point(size = 2.5) + 
      labs(x = "-log10(FDR)", y="") + 
      scale_y_continuous(breaks = 1:nrow(d1), 
                         labels = d1_label, 
                         limits = c(0.8, nrow(d1)+0.2)) + 
      theme(legend.position = "top") + 
      scale_color_manual(values = filtered_colors) + 
      guides(color = guide_legend(title = NULL)) + 
      theme(
        panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA)
       )
    
    h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
    if (nrow(d1)<=2){
      h1 = 0.75*(nrow(d1)+1)/20 + 0.25
    }
    
    canvas2 <- ggdraw() + draw_plot(g2, x = 0, y = 1-h1, width = 1, height = h1)    
    
    saved_figure_name = gsub("\n", "_", nm1)
    saved_figure_name = paste0(gsub(" ", "_", saved_figure_name), ".svg")
  
    ggsave(file=file.path(goseq_figure_dir, saved_figure_name), 
           plot=canvas2, width=8, height=5, bg = "transparent", units="in")
    
    png_figure_name = gsub("\n", "_", nm1)
    png_figure_name = paste0(gsub(" ", "_", png_figure_name), ".png")
    
    ggsave(file=file.path(goseq_figure_dir, png_figure_name), 
           plot=canvas2, width=1920, height=1200, bg = "transparent", units="px")
  }
  
}

gc()
##           used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 1109617 59.3    2285195 122.1         NA  2285195 122.1
## Vcells 1949994 14.9    8388608  64.0      65536  3900572  29.8
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tidyr_1.3.1       cowplot_1.1.3     stringr_1.5.1     ggrepel_0.9.5    
## [5] ggpubr_0.6.0      ggplot2_3.5.1     data.table_1.15.4
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  xfun_0.47         bslib_0.8.0       purrr_1.0.2      
##  [5] carData_3.0-5     colorspace_2.1-1  vctrs_0.6.5       generics_0.1.3   
##  [9] htmltools_0.5.8.1 yaml_2.3.10       utf8_1.2.4        rlang_1.1.4      
## [13] jquerylib_0.1.4   pillar_1.9.0      glue_1.7.0        withr_3.0.1      
## [17] lifecycle_1.0.4   munsell_0.5.1     ggsignif_0.6.4    gtable_0.3.5     
## [21] ragg_1.2.7        evaluate_0.24.0   labeling_0.4.3    knitr_1.48       
## [25] fastmap_1.2.0     fansi_1.0.6       highr_0.11        broom_1.0.6      
## [29] Rcpp_1.0.13       scales_1.3.0      backports_1.5.0   cachem_1.1.0     
## [33] jsonlite_1.8.8    abind_1.4-5       farver_2.1.2      systemfonts_1.1.0
## [37] textshaping_0.3.7 digest_0.6.37     stringi_1.8.4     rstatix_0.7.2    
## [41] dplyr_1.1.4       grid_4.2.3        cli_3.6.3         tools_4.2.3      
## [45] magrittr_2.0.3    sass_0.4.9        tibble_3.2.1      car_3.1-2        
## [49] pkgconfig_2.0.3   rmarkdown_2.28    svglite_2.1.3     R6_2.5.1         
## [53] compiler_4.2.3